# Authors: Emil & Ivetta

# This scrip produces following figures and tables:
# Figure 8 and 9 in Appendix
# Tables 20-22, 27-28 in Appendix

# this script does subgroup conjoint analysis
# beware cjplot package needs categorical variables to be factors, otherwise it doesn't understand

pacman::p_load(haven, dplyr, labelled, xlsx, cjoint, sandwich, lmtest, cregg, car, stringr, 
               ggtext, glue, ggsci, stargazer)
#install.packages("ggstance")

#import conjoint df enriched by all the vars from the second wave survey
df <- read_sav("drafts/conjoint_solidarity/perspectives_docs/revision_2/replication_code/input_data.sav")
df <- as_factor(df)



#setting cool ggtheme
theme_Publication <- function(base_size=18, base_family="Helvetica") {
  library(grid)
  library(ggthemes)
  (theme_foundation(base_size=base_size, base_family=base_family)
    + theme(plot.title = element_text(face = "bold",
                                      size = rel(0.9), hjust = 0.5),
            text = element_text(),
            panel.background = element_rect(colour = NA),
            plot.background = element_rect(colour = NA),
            panel.border = element_rect(colour = NA),
            axis.title = element_text(face = "bold",size = rel(0.8)),
            axis.title.y = element_text(angle=90,vjust =2),
            axis.title.x = element_text(vjust = -0.2),
            axis.text = element_text(), 
            axis.line = element_line(colour="black"),
            axis.ticks = element_line(),
            panel.grid.major = element_line(colour="#f0f0f0"),
            panel.grid.minor = element_blank(),
            legend.key = element_rect(colour = NA),
            legend.position = "bottom",
            legend.direction = "horizontal",
            legend.key.size= unit(0.2, "cm"),
            legend.margin = unit(0, "cm"),
            legend.title = element_text(face="italic"),
            plot.margin=unit(c(10,5,5,5),"mm"),
            strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
            strip.text = element_text(face="bold")
    ))
  
}



# Define labels to be bold in the plot
bold_labels <- c("Age:", "Gender:", "Children:", "Profession:", "Ethnic background:", "Motivation for emigration:") %>%
  paste0(collapse = "|")

# Custom function to apply bold to specific labels
highlight <- function(x, pat, color="black", family="") {
  # Remove parentheses
  x <- gsub("[()]", "", x)
  
  # Apply bold to labels that match the pattern
  ifelse(grepl(pat, x), glue::glue("<b style='font-family:{family}; color:{color}'>{x}</b>"), x)
}


################################################
################################################
################################################
################################################
# Fine-grained regressions
################################################
################################################
################################################
################################################

# Adding a break line for plotting
df_mig <- df %>%
  mutate(Motivation = fct_recode(Motivation,
                                 `Can't accept Russian politics +<br />Was arrested in rallies` = "Can't accept Russian politics +\nWas arrested in rallies"))




# Guilt
# calculate conditional AMCEs for all guilt levels and Regression for continuous Guilt

df_guilt <- df_mig %>%
  filter(when_left %in% "Да") %>% #filter out old emigrants
  select(ResponseId, 
         choice,
         Age, Gender, Children, Profession, Ethnicity, Motivation,
         guilt_eng) %>%
  mutate(guilt_num = as.numeric(guilt_eng)) %>%
  na.omit() 

guilt_amces <- cj(df_guilt, choice ~ Age + Gender + Children + Profession + Ethnicity + Motivation, 
                  id = ~ResponseId, 
                  estimate = "amce", 
                  by = ~guilt_eng)

label_rename <- c(
  Age = "Age:",
  Gender = "Gender:",
  Children = "Children:",
  Profession = "Profession:",
  Ethnicity = "Ethnic background:",
  Motivation = "Motivation for emigration:"
)

guilt_amces$feature <- ifelse(guilt_amces$feature %in% names(label_rename), 
                     label_rename[guilt_amces$feature], 
                     guilt_amces$feature)


guilt_many_amce_plot <- plot(rbind(guilt_amces), size = 2, legend_pos = "none") + 
  ggplot2::facet_wrap(~BY, ncol = 5L) + 
  ggplot2::theme_bw() +
  ggplot2::theme(legend.position = "none") + 
  geom_point(size = 2) +
  scale_color_nejm() +
  scale_y_discrete(labels= function(x) highlight(x, bold_labels, "black")) +
  theme(axis.text.y=element_markdown())

# saving responsiblity plot
ggsave("drafts/conjoint_solidarity/plots/extra_plots/guilt_many_amce_plot.png",
       guilt_many_amce_plot,
       dpi = 500,
       width = 11,
       height = 5.5)


# Checking regression with interaction term (to use all the information from continous variable)

model_cj_guilt <- lm(choice ~ Age + Gender + Children + Profession + Age + Motivation + Ethnicity*guilt_num, 
                     data = df_guilt)

# calculate the clustered standard errors
clust_se <- coeftest(model_cj_guilt, 
                     vcov. = vcovCL,  # by default uses HC1
                     cluster = ~ ResponseId, 
                     data = df_guilt)

# print the table using stargazer
stargazer(model_cj_guilt,
          se = list(clust_se[, "Std. Error"]), 
          type = "text", 
          title = "Guilt and Ethnicity interaction. Clustered Standard Errors (HC1)",
          covariate.labels = c("50 y/o (ref = 30 y/o)", 
                               "Woman (ref = Man)", 
                               "Have kids under 18 (ref = No children)", 
                               "Journalist (ref = Office worker)",
                               "Health worker (ref = Office worker)",
                               "Political motivation (ref = Economic motivation)", 
                               "Political motivation + repressions (ref = Economic motivation)",
                               "Ukrainian origin (ref = Russian origin)", 
                               "Buryat origin (ref = Russian origin)",
                               "Guilt (continuous)",
                               "Ukrainian origin X Guilt (continuous)",
                               "Buryat origin X Guilt (continuous)"))



# Responsibility

# calculate conditional AMCEs for all guilt levels

df_responsiblity <- df_mig %>%
  filter(when_left %in% "Да") %>% #filter out old emigrants
  select(ResponseId, 
         choice,
         Age, Gender, Children, Profession, Ethnicity, Motivation,
         responsiblity_eng) %>%
  mutate(responsiblity_num = as.numeric(responsiblity_eng)) %>%
  na.omit() 

resp_amces <- cj(df_responsiblity, choice ~ Age + Gender + Children + Profession + Ethnicity + Motivation, 
                 id = ~ResponseId, 
                 estimate = "amce", 
                 by = ~responsiblity_eng)

label_rename <- c(
  Age = "Age:",
  Gender = "Gender:",
  Children = "Children:",
  Profession = "Profession:",
  Ethnicity = "Ethnic background:",
  Motivation = "Motivation for emigration:"
)

resp_amces$feature <- ifelse(resp_amces$feature %in% names(label_rename), 
                              label_rename[resp_amces$feature], 
                             resp_amces$feature)


resp_many_amce_plot <- plot(rbind(resp_amces), size = 2, legend_pos = "none") + 
  ggplot2::facet_wrap(~BY, ncol = 5L) + 
  ggplot2::theme_bw() +
  ggplot2::theme(legend.position = "none")  + 
  geom_point(size = 2) +
  scale_color_nejm() +
  scale_y_discrete(labels= function(x) highlight(x, bold_labels, "black")) +
  theme(axis.text.y=element_markdown())

# saving responsiblity plot
ggsave("drafts/conjoint_solidarity/plots/extra_plots/resp_many_amce_plot.png",
       resp_many_amce_plot,
       dpi = 500,
       width = 11, height = 5.5)



model_cj_repr <- lm(choice ~ Age + Gender + Children + Profession + Age + Motivation + Ethnicity*responsiblity_num, 
                    data = df_responsiblity)

# calculate the clustered standard errors
clust_se_resp <- coeftest(model_cj_repr, vcov. = vcovCL, cluster = ~ ResponseId, data = df_responsiblity)

# print the table using stargazer
stargazer(model_cj_repr,
          se = list(clust_se_resp[, "Std. Error"]),
          type = "text", 
          title = "Responsibility and Ethnicity interaction. Clustered Standard Errors (HC1)",
          covariate.labels = c("50 y/o (ref = 30 y/o)", 
                               "Woman (ref = Man)", 
                               "Have kids under 18 (ref = No children)", 
                               "Journalist (ref = Office worker)",
                               "Health worker (ref = Office worker)",
                               "Political motivation (ref = Economic motivation)", 
                               "Political motivation + repressions (ref = Economic motivation)",
                               "Ukrainian origin (ref = Russian origin)", 
                               "Buryat origin (ref = Russian origin)",
                               "Responsibility (continuous)",
                               "Ukrainian origin X Responsibility (continuous)",
                               "Buryat origin X Responsibility (continuous)"))




# Alternative using index
model_cj_index <- lm(choice ~ Age*guilt_resp_index + Gender*guilt_resp_index + Children*guilt_resp_index + Profession*guilt_resp_index + Age*guilt_resp_index + Motivation*guilt_resp_index + Ethnicity*guilt_resp_index, 
                     data = df_index)

model_cj_index <- lm(choice ~ Age*guilt_resp_index + Gender*guilt_resp_index + Children*guilt_resp_index + Profession*guilt_resp_index + Age*guilt_resp_index + Motivation*guilt_resp_index + Ethnicity*guilt_resp_index, 
                     data = df_index)

# Alternative with controls
model_cj_index_control <- lm(choice ~ Age*guilt_resp_index + Gender*guilt_resp_index + Children*guilt_resp_index + Profession*guilt_resp_index + Age*guilt_resp_index + Motivation*guilt_resp_index + Ethnicity*guilt_resp_index +
                       sex + age_respondent + politics_interest_ru_num + income_consum_num + polit_civic_index,
                     data = df_index)


# calculate the clustered standard errors
clust_se <- coeftest(model_cj_index, 
                     vcov. = vcovCL,  # by default uses HC1
                     cluster = ~ ResponseId, 
                     data = df_index)

# print the table using stargazer
stargazer(model_cj_index,
          se = list(clust_se[, "Std. Error"]), 
          type = "text", 
          title = "Guilt and Ethnicity interaction. Clustered Standard Errors (HC1)",
          covariate.labels = c("50 y/o (ref = 30 y/o)", 
                               "Woman (ref = Man)", 
                               "Have kids under 18 (ref = No children)", 
                               "Journalist (ref = Office worker)",
                               "Health worker (ref = Office worker)",
                               "Political motivation (ref = Economic motivation)", 
                               "Political motivation + repressions (ref = Economic motivation)",
                               "Ukrainian origin (ref = Russian origin)", 
                               "Buryat origin (ref = Russian origin)",
                               "Index Guilt/responsibility (continuous)",
                               "Ukrainian origin X Index (continuous)",
                               "Buryat origin X Index (continuous)"))





# ALL FINE GRAINED ANALYSIS HERE
# Index of guilt and responsibility

# calculate conditional AMCEs for all guilt levels and Regression for continuous index of guilt and resp

df_index <- df_mig %>%
  filter(when_left %in% "Да") %>% #filter out old emigrants
  select(ResponseId, 
         choice,
         Age, Gender, Children, Profession, Ethnicity, Motivation,
         guilt_eng, responsiblity_eng,
         # some controls
         sex, age_respondent, politics_interest_ru_num, income_consum_num, polit_civic_index,
         repress_total, repress_bin, polit_civic_bin, polit_civic_bin_mean
  ) %>%
  mutate(guilt_num = as.numeric(guilt_eng),
         responsiblity_num = as.numeric(responsiblity_eng),
         guilt_resp_index = guilt_num + responsiblity_num,
         guilt_resp_bin = factor(case_when(guilt_resp_index > 6 ~ 1,
                                           guilt_resp_index <= 6 ~ 0)))
#install.packages("psych")          # run once
library(psych)


# Checking if index is a something worth constructing here
items <- df_index[, c("guilt_num", "responsiblity_num")]
psych::alpha(items) # alpha is 0.76
cor.test(df_index$guilt_num, df_index$responsiblity_num) # correlation is 0.62
# there are good reason to believe that this is kind of index



### GUILT RESPONSIBILITY AS INDEX

model_cj_index <- lm(choice ~ Age + Gender + Children + Profession + Motivation + Ethnicity*guilt_resp_index, 
                     data = df_index)

# calculate the clustered standard errors
clust_se <- coeftest(model_cj_index, 
                     vcov. = vcovCL,  # by default uses HC1
                     cluster = ~ ResponseId, 
                     data = df_index)

# with controls
model_cj_index_controls <- lm(choice ~ Age + Gender + Children + Profession + Motivation + Ethnicity*guilt_resp_index +
                                Ethnicity*politics_interest_ru_num +
                                Ethnicity*polit_civic_index + 
                                Ethnicity*repress_total, 
                              data = df_index)


# calculate the clustered standard errors
clust_se_controls <- coeftest(model_cj_index_controls, 
                              vcov. = vcovCL,  # by default uses HC1
                              cluster = ~ ResponseId, 
                              data = df_index)


stargazer(model_cj_index, model_cj_index_controls,
          se    = list(clust_se[, "Std. Error"], clust_se_controls[, "Std. Error"]),          # <- list of SE vectors
          type  = "text",
          title = "AMCE of Ethnicity conditional on Guilt/responsibility index (OLS, clustered on respondent)",
          omit = omit_regex <- paste0("^(", paste(c("Age", "Gender", "Children", "Profession", "Motivation"), collapse = "|"), ")"),
          column.labels = c("Base model", "Controls"),
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001),
          single.row = TRUE,
          add.lines     = list(
            c("Other conjoint attribute contolled",
              "Yes", "Yes")),
            covariate.labels = c(
              "Ukrainian origin (ref = Russian origin)",
              "Buryat origin (ref = Russian origin)",
              "Guilt/responsibility index",
              "Political interest",
              "Political engagement (index)",
              "Index of repression",
              "Ukrainian origin x Guilt/responsibility index",
              "Buryat origin x Guilt/responsibility index",
              "Ukrainian origin x Political interest",
              "Buryat origin x Political interest",
              "Ukrainian origin x Political engagement (index)",
              "Buryat origin x Political engagement (index)",
              "Ukrainian origin x Index of repression",
              "Buryat origin x Index of repression")
)  

# Notes for this table:
# The guilt/responsibility index is measured as the sum of responses indicating the extent to which the respondent feels collective guilt (1 to 5) and responsibility (1 to 5).
# The political engagement index is measured as the sum of various political and civic activities the respondent was involved in during the last three months in emigration (e.g., signing petitions, volunteering, participating in coordinated or uncoordinated rallies, donating to Russian NGOs, helping Russian emigrants, Ukrainian refugees, etc.), ranging from 0 to 7.
# The repression experience index is measured as the sum of different types of political pressure (arrests, threats at work/university, searches at home/workplace, threats from pro-government groups, attacks from pro-government groups), ranging from 0 to 6.
# Political interest is measured as the extent to which a person is interested in Russian politics, ranging from 0 (not at all interested) to 3 (very interested).


### POLITICAL ENGAGEMENT

model_cj_index <- lm(choice ~ Age + Gender + Children + Profession+ Ethnicity + Motivation*polit_civic_index, 
                     data = df_index)

# calculate the clustered standard errors
clust_se <- coeftest(model_cj_index, 
                     vcov. = vcovCL,  # by default uses HC1
                     cluster = ~ ResponseId, 
                     data = df_index)

model_cj_bin <- lm(choice ~ Age + Gender + Children + Profession + Ethnicity + Motivation*polit_civic_bin, 
                       data = df_index)

# calculate the clustered standard errors
clust_se_bin <- coeftest(model_cj_bin, 
                       vcov. = vcovCL,  # by default uses HC1
                       cluster = ~ ResponseId, 
                       data = df_index)

model_cj_binmean <- lm(choice ~ Age + Gender + Children + Profession+ Ethnicity + Motivation*polit_civic_bin_mean, 
                   data = df_index)

# calculate the clustered standard errors
clust_se_binmean <- coeftest(model_cj_binmean, 
                         vcov. = vcovCL,  # by default uses HC1
                         cluster = ~ ResponseId, 
                         data = df_index)

# with interest
model_cj_binmean_interest<- lm(choice ~ Age + Gender + Children + Profession + Ethnicity  +
                                  Motivation*politics_interest_ru_num,
                                  #Motivation*guilt_resp_index + 
                                  #Motivation*repress_total, 
                              data = df_index)


# calculate the clustered standard errors
clust_se_binmean_interest<- coeftest(model_cj_binmean_interest, 
                              vcov. = vcovCL,  # by default uses HC1
                              cluster = ~ ResponseId, 
                              data = df_index)

# with controls
model_cj_binmean_controls<- lm(choice ~ Age + Gender + Children + Profession + Ethnicity  + Motivation*polit_civic_index +
                                 Motivation*politics_interest_ru_num +
                               Motivation*guilt_resp_index + 
                               Motivation*repress_total, 
                               data = df_index)


# calculate the clustered standard errors
clust_se_binmean_controls <- coeftest(model_cj_binmean_controls, 
                                     vcov. = vcovCL,  # by default uses HC1
                                     cluster = ~ ResponseId, 
                                     data = df_index)

stargazer(model_cj_index, model_cj_bin, model_cj_binmean, model_cj_binmean_interest,
          se    = list(clust_se[, "Std. Error"], 
                       clust_se_bin[, "Std. Error"], 
                       clust_se_binmean[, "Std. Error"], 
                       clust_se_binmean_interest[, "Std. Error"]),          # <- list of SE vectors
          type  = "text",
          title = "Preference for politically motivated help-seekers, conditional on political engagement (standard errors clustered by respondent).",
          omit = omit_regex <- paste0("^(", paste(c("Age", "Gender", "Children", "Profession", "Ethnicity"), collapse = "|"), ")"),
          column.labels = c("Pol. Index", "Pol.Binary", "Pol. Binary (mean)", "Political Interest"),
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001),
          single.row = TRUE,
          add.lines     = list(
            c("Other conjoint attribute contolled",
              "Yes", "Yes", "Yes", "Yes", "Yes")),
          covariate.labels = c(
            "Political motivation (ref = Economic motivation)",
            "Political motivation + repressions",
            "Political engagement (index)", 
            "Political motivationx x Political engagement (index)",
            "Political motivation + repressions x Political engagement (index)",
            "Political engagement experience", 
            "Political motivationx x Political engagement experience",
            "Political motivation + repressions x Political engagement experience",
            "Political engagement (index binarized at the mean)", 
            "Political motivationx x Political engagement (index binarized at the mean)",
            "Political motivation + repressions x Political engagement (index binarized at the mean)",
            "Political interest", 
            "Political motivationx x Political interest",
            "Political motivation + repressions x interest"
            )
            # "Index Guilt/responsibility (continuous)",
            # "Political interest",
            # "Index of political/civic engagement",
            # "Index of repression",
            # "Ukrainian origin X Index Guilt/responsibility",
            # "Buryat origin X Index Guilt/responsibility",
            # "Ukrainian origin X Political interest",
            # "Buryat origin X Political interest",
            # "Ukrainian origin X Index of political/civic engagement",
            # "Buryat origin X Index of political/civic engagement",
            # "Ukrainian origin X Index of repression",
            # "Buryat origin X Index of repression")
)  

# Notes for this table:
# The political engagement index is measured as the sum of various political and civic activities the respondent was involved in during the last three months in emigration (e.g., signing petitions, volunteering, participating in coordinated or uncoordinated rallies, donating to Russian NGOs, helping Russian emigrants, Ukrainian refugees, etc.), ranging from 0 to 7.
# Political engagement (index binarized at the mean) is measured in the same way as before, but the index is converted into a binary variable (0 or 1) depending on whether the number of activities is below or above the sample mean.
# Political engagement experience is measured as a binary variable, coded 0 if there was no such activity in the past three months and 1 if there was at least one such activity.
# Political interest is measured as the extent to which a person is interested in Russian politics, ranging from 0 (not at all interested) to 3 (very interested).



stargazer(model_cj_binmean_controls,
          se    = list(clust_se_binmean_controls[, "Std. Error"]),          # <- list of SE vectors
          type  = "text",
          title = "Preference for politically motivated help-seekers, conditional on political engagement (standard errors clustered by respondent).",
          omit = omit_regex <- paste0("^(", paste(c("Age", "Gender", "Children", "Profession", "Ethnicity"), collapse = "|"), ")"),
          column.labels = c("Base model", "Controls", "Controls", "Controls", "Controls"),
          omit.stat = c("f"),
          star.cutoffs = c(0.05, 0.01, 0.001),
          single.row = TRUE,
          add.lines     = list(
            c("Other conjoint attribute contolled",
              "Yes", "Yes", "Yes", "Yes", "Yes")),
          covariate.labels = c(
            "Political motivation (ref = Economic motivation)",
            "Political motivation + repressions",
            "Political engagement (index)",
            "Political interest",
            "Guilt/responsibility (index)",
            "Experience of repression (index)",
            "Political motivationx x Political engagement (index)",
            "Political motivation + repressions x Political engagement (index)",
            "Political motivationx x Political interest",
            "Political motivation + repressions x Political interest",
            "Political motivationx x Guilt/responsibility (index)",
            "Political motivation + repressions x Guilt/responsibility (index)",
            "Political motivationx x Experience of repression (index)",
            "Political motivation + repressions x Experience of repression (index)"
          )
)  

# Notes for this table:
# The guilt/responsibility index is measured as the sum of responses indicating the extent to which the respondent feels collective guilt (1 to 5) and responsibility (1 to 5).
# Political engagement (index binarized at the mean) is measured as the sum of various political and civic activities the respondent was involved in during the last three months, but the index is converted into a binary variable (0 or 1) depending on whether the number of activities is below or above the sample mean.
# The repression experience index is measured as the sum of different types of political pressure (arrests, threats at work/university, searches at home/workplace, threats from pro-government groups, attacks from pro-government groups), ranging from 0 to 6.
# Political interest is measured as the extent to which a person is interested in Russian politics, ranging from 0 (not at all interested) to 3 (very interested).

